set.seed(1)
#if (!requireNamespace("BiocManager", quietly = TRUE))   
#  install.packages("BiocManager",repos = "http://cran.us.r-project.org")  
#BiocManager::install("fgsea")
#BiocManager::install("limma")
#BiocManager::install("Biobase")
#install.packages("ggplot2")
#install.packages("igraph")
library(limma)
library(Biobase)
library(ggplot2)
library(igraph)
library(data.table)
library(fgsea)
library(readr)
library(biomaRt)
library(edgeR)

# Get portal data and use only gtex portal samples
thyroid_panda_male <- data.frame(fread("/home/ubuntu/GTEx_thyroid_panda/thyroid_panda_male.txt"))[,-3]
thyroid_panda_female <- data.frame(fread("/home/ubuntu/GTEx_thyroid_panda/thyroid_panda_female.txt"))[,-3]

# check if rows in males and females are in the same order
sum(thyroid_panda_male$V1 != thyroid_panda_female$V1)
sum(thyroid_panda_male$V2 != thyroid_panda_female$V2)

net = thyroid_panda_male
net$female = thyroid_panda_female$V4
colnames(net) = c("TF", "gene", "male", "female")
head(net)

# Compute indegree of every gene
library(plyr)
tab.inDegree <- ddply(net, .(gene), numcolwise(sum))
head(tab.inDegree)
dim(tab.inDegree)
rownames(tab.inDegree) <- tab.inDegree[,1]
tab.inDegree <- tab.inDegree[,-1]
head(tab.inDegree)

##############################
genes = rownames(tab.inDegree)

# Get gene data
GTEx_thyroid = get(load("/home/esaha/BONOBO/data/GTEx_thyroid/GTEx_thyroid_RSE.RData"))
fdata.GTEx = as.data.frame(GTEx_thyroid@rowRanges)

# Discard Y genes
### Get GenecIDs for Y genes 
Y_genes.GTEx = fdata.GTEx$gene_id[which(fdata.GTEx$seqnames == "chrY")]
Y_genes.GTEx = gsub("\\..*", "", Y_genes.GTEx)

indegree = tab.inDegree[-which(rownames(tab.inDegree) %in% Y_genes.GTEx),]
genes = genes[-which(genes %in% Y_genes.GTEx)]
gene_id = sub("\\..*", "", fdata.GTEx$gene_id)
gene_names = fdata.GTEx$gene_name[match(genes, gene_id)]

# Save sex difference
sexdiff = indegree$female - indegree$male
sort_mat = order(as.matrix(sexdiff), decreasing = TRUE)
indegree_rank  = as.matrix(sexdiff)[sort_mat[1:length(sort_mat)]]
names(indegree_rank) = gene_names[sort_mat[1:length(sort_mat)]]

indegree_rank[1:10]

# Load KEGG pathways
# pathways <- gmtPathways("/home/ubuntu/c2.cp.kegg.v7.1.symbols.gmt")
pathways <- gmtPathways("/home/ubuntu/c5.go.bp.v2022.1.Hs.symbols.gmt")
#pathways <- gmtPathways("/home/ubuntu/c2.cp.reactome.v2022.1.Hs.symbols.gmt")

fgseaRes <- fgsea(pathways, indegree_rank, minSize=15, maxSize=500)
head(fgseaRes)

save(fgseaRes, file = "/home/esaha/BONOBO/plots/GTEx_thyroid/PANDA_GTEx_Thyroid_GSEA_GOBP.RData")


# Top 50 pathways
# fgseaRes = fgseaRes[order(fgseaRes$padj),]
# fgseaRes = fgseaRes[1:50,]
